In [1]:
import GPy
import optunity
import sobol
import numpy as np
import matplotlib.pyplot as plt
import scipy

Task 2

In [4]:
def f(x, y):
    return x**2-x + y**2 + y-np.cos(2*np.pi*x - np.pi) - np.cos(2*np.pi*y + np.pi) + 2.5
In [65]:
x = np.linspace(-6,6,2000)
y = np.linspace(-6,6,2000)
X,Y = np.meshgrid(x,y)
In [141]:
f_xy= f(X,Y)
In [139]:
fig = plt.figure(figsize=(10,6))
plt.contourf(X, Y, f_xy,50)
plt.xlabel('x')
plt.ylabel('y')
plt.title('f(x,y)')
plt.colorbar()
plt.show()
In [159]:
#Expected Improvement:
def u(x,y,gp=GP_model):
    q=np.array([[x,y]])
    Ebest=min(gp.Y)
    mean,var=gp.predict(q)
    std=np.sqrt(var)
    g=(Ebest-mean)/std
    return std*(g*scipy.stats.norm.cdf(g)+scipy.stats.norm.pdf(g))
In [180]:
number_of_samples = 30
parameterUpperLimits = np. array ([6 ,6])
parameterLowerLimits = np. array ([ -6 , -6])
Q=[]
for i in range ( number_of_samples ):
    x,y = sobol.i4_sobol(2 ,i)[0]*(parameterUpperLimits - parameterLowerLimits ) + parameterLowerLimits
    Q.append([x,y])
train=np.array(Q)
Q=train.copy()
In [167]:
#calculate the true values
E=f(*Q.T).reshape(-1,1)
In [168]:
GP_model=GPy.models.GPRegression(Q,E,kernel=GPy.kern.RBF(2) + GPy.kern.White(2))
In [163]:
coord=np.vstack((X,Y)).reshape(2,-1).T
In [78]:
fig = plt.figure(figsize=(6,6))
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(X, Y, GP_model.predict(coord)[0].reshape(X.shape))
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('f(x,y)')
ax.set_title('initial GP approximation')
plt.show()
In [169]:
#optimization
for i in range(30):
    _, details, _ = optunity.maximize(u, num_evals=100, x=[-6, 6], y=[-6, 6])
    best_ind=np.where(details.call_log['values']==details.optimum)[0][0]
    x,y=details.call_log['args']['x'][best_ind],details.call_log['args']['y'][best_ind]
    Q=np.append(Q,[[x,y]],axis=0)
    E=f(*Q.T).reshape(-1,1)
    GP_model=GPy.models.GPRegression(Q,E,kernel=GPy.kern.RBF(2) + GPy.kern.White(2))
    #Print the true function values and the function values predicted by the GP as a function of iteration number
    #-Which values were asked for?

We knew somthing wasn't right. Turns out we forgot to optimize the gaussian Process. </span >

In [171]:
fig = plt.figure(figsize=(10,6))
plt.contourf(X, Y,f_xy,30)
plt.colorbar()
plt.scatter(*Q[:30].T,s=10,label='Q')
plt.scatter(*Q[30:].T,s=10,label='new Q')
plt.scatter(.5,-.5,label='glob. min.')
plt.xlim(-6,6)
plt.ylim(-6,6)
plt.xlabel('x')
plt.ylabel('y')
plt.title('f(x,y)')
plt.legend()
plt.show()
In [179]:
fig = plt.figure(figsize=(6,6))
ax = fig.add_subplot(111, projection='3d')
ax.plot_surface(X, Y, GP_model.predict(coord)[0].reshape(X.shape))
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('f(x,y)')
ax.set_title('final GP approximation')
plt.show()
In [182]:
#repeat experiment with two utility functions
def varGP(x,y,gp=GP_model):
    q=np.array([[x,y]])
    return gp.predict(q)[1]
In [184]:
#optimization
for i in range(30):
    _, details, _ = optunity.maximize(u if i%2==0 else varGP, num_evals=100, x=[-6, 6], y=[-6, 6])
    best_ind=np.where(details.call_log['values']==details.optimum)[0][0]
    x,y=details.call_log['args']['x'][best_ind],details.call_log['args']['y'][best_ind]
    Q=np.append(Q,[[x,y]],axis=0)
    E=f(*Q.T).reshape(-1,1)
    GP_model=GPy.models.GPRegression(Q,E,kernel=GPy.kern.RBF(2) + GPy.kern.White(2))
In [185]:
fig = plt.figure(figsize=(10,6))
plt.contourf(X, Y,f_xy,30)
plt.colorbar()
plt.scatter(*Q[:30].T,s=10,label='Q')
plt.scatter(*Q[30:].T,s=10,label='new Q')
plt.scatter(.5,-.5,label='glob. min.')
plt.xlim(-6,6)
plt.ylim(-6,6)
plt.xlabel('x')
plt.ylabel('y')
plt.title('f(x,y)')
plt.legend()
plt.show()
In [205]:
fig = plt.figure(figsize=(12,6))
ax = fig.add_subplot(121, projection='3d')
val=GP_model.predict(coord)[0]
ax.plot_surface(X, Y, val.reshape(X.shape))
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('f(x,y)')
ind=np.argmin(val)
ax.set_title('final GP approximation (alternating), min: gp(%.2f, %.2f)=%.2f'%(*coord[ind],min(val)))
ax = fig.add_subplot(122, projection='3d')
val=GP_model.predict(coord)[0]
ax.plot_surface(X, Y,f_xy)
ax.set_xlabel('x')
ax.set_ylabel('y')
ax.set_zlabel('f(x,y)')
ax.set_title('f(x,y)')
plt.show()
In [198]:
#As a baseline, also compute 60 values of f(x, y) for uniformly random points in [−6, 6]^2 
#and compare their minimum with the best value found by Bayesian optimization
q_uni=12*np.random.random((2,60))-6
min(f(q_uni[0],q_uni[1]))
Out[198]:
1.5138900496600467

Task 3

In [2]:
import time
import scipy.sparse 
import scipy.sparse.linalg
import scipy.spatial
In [24]:
def generalized_exponential_kernel(data, rho, gamma, max_distance):
    """Compute the generalized exponential kernel matrix.

    :param data: data matrix
    :param rho: parameter rho of the gaussian kernel
    :return: gaussian kernel matrix
    """
    assert len(data.shape) == 2
    assert rho > 0
    limit = np.exp(-8)#np.exp(-np.sqrt(max_distance/rho)**(2*gamma))
    # Find the pairwise squared distances and compute the Gaussian kernel.
    K = []
    for k in data:
        d = np.exp(-(np.sqrt(np.sum((data - k)**2,axis=1)/rho**2))**gamma)
        d[d < limit] = 0.0  # truncate the Gaussian
        d = scipy.sparse.csc_matrix(d[:,None])
        K.append(d)
    K = scipy.sparse.hstack(K)
    return K

def compute_alpha(train_x, train_y, tau, rho, gamma, max_distance):
    """Compute the alpha vector of the ridge regressor.

    :param train_x: training x data
    :param train_y: training y data
    :param tau: parameter tau of the ridge regressor
    :param rho: parameter rho of the gaussian kernel
    :return: alpha vector
    """
    print("building input kernel matrix")
    K = generalized_exponential_kernel(train_x, rho, gamma, max_distance)
    print("sparsity: %.2f%%" % (float(100*K.nnz) / (K.shape[0]*K.shape[1])))
    M = K + tau * scipy.sparse.identity(train_x.shape[0])
    y = scipy.sparse.csc_matrix(train_y[:,None])
    print("solving sparse system")
    alpha = scipy.sparse.linalg.cg(M, train_y)
    print("done computing alpha")
    return alpha[0]
In [25]:
class KernelRidgeRegressor(object):
    """Kernel Ridge Regressor.
    """

    def __init__(self, tau, rho, gamma):
        self.dim = None
        self.train_x = None
        self.alpha = None
        self.mean_y = None
        self.std_y = None
        self.tau = tau
        self.rho = rho
        self.gamma = gamma
        self.scale = -1. / rho**(gamma/2)
        self.max_distance = 4.0*rho

    def train(self, train_x, train_y):
        """Train the kernel ridge regressor.

        :param train_x: training x data
        :param train_y: training y data
        """
        assert len(train_x.shape) == 2
        assert len(train_y.shape) == 1
        assert train_x.shape[0] == train_y.shape[0]

        self.dim = train_x.shape[1]
        self.train_x = train_x.astype(np.float32)
        self.tree = scipy.spatial.cKDTree(self.train_x)

        self.mean_y = train_y.mean()
        self.std_y = train_y.std()
        train_y_std = (train_y - self.mean_y) / self.std_y

        self.alpha = compute_alpha(self.train_x, train_y_std, self.tau, self.rho, self.gamma, self.max_distance)


    def predict_single(self, pred_x):
        """Predict the value of a single instance.

        :param pred_x: x data
        :return: predicted value of pred_x
        """
        assert len(pred_x.shape) == 1
        assert pred_x.shape[0] == self.dim
        indices = np.asarray(self.tree.query_ball_point(pred_x, self.max_distance), dtype=np.dtype("i8"))
        dist = np.sum((self.train_x[indices]-pred_x)**2, axis=1)
        kappa = np.exp(self.scale*dist**(self.gamma/2))
        pred_y = np.dot(kappa, self.alpha[indices])
        return self.std_y * pred_y + self.mean_y

    def predict(self, pred_x):
        """Predict the values of pred_x.

        :param pred_x: x data
        :return: predicted values of pred_x
        """
        assert len(pred_x.shape) == 2
        assert pred_x.shape[1] == self.dim
        pred_x = pred_x.astype(np.float32)
        return np.array([self.predict_single(x) for x in pred_x])
In [26]:
KernelRidgeRegressors={}
In [27]:
def kernel_ridge_regression(tau, rho, gamma):
    # Load the image.
    im_orig = np.squeeze(plt.imread("cc_90.png"))
    
    # Make a copy, so both the original and the regressed image can be shown afterwards.
    im = np.array(im_orig)

    # Find the known pixels and the pixels that shall be predicted.
    known_ind = np.where(im != 0)
    unknown_ind = np.where(im == 0)
    known_x = np.array(known_ind).transpose()
    known_y = np.array(im[known_ind])
    pred_x = np.array(unknown_ind).transpose()
    # All coordinates
    coord=np.indices(im.shape).reshape(2,-1).T
    # Train and predict with the given regressor.
    start = time.time()
    print("training...")
    r = KernelRidgeRegressor(tau, rho, gamma)
    #saving kernel matrices
    try:
        r = KernelRidgeRegressors[tau,rho, gamma]
    except KeyError:
        r.train(known_x, known_y)
        KernelRidgeRegressors[tau,rho, gamma] = r
    print("done training")
    # pickle.dump(r, open("regressor.p", "wb"))
    # r = pickle.load(open("regressor.p", "rb"))
    print("predicting...")
    #pred_y = r.predict(pred_x)
    y = r.predict(coord)
    print("done predicting")

    # Write the predicted values back into the image and show the result.
    im = y.reshape(im_orig.shape)
    stop = time.time()
    print("Train and predict took %.02f seconds." % (stop-start))
    #print(im.shape)
    plt.figure(figsize=(6,8))
    plt.imshow(im,cmap='gray')
    plt.axis('off')
    plt.title(r'$\tau=%.2e, \rho=%.3f, \gamma=%.3f$'%(tau,rho,gamma))
    plt.savefig("restored/res_%s_%s_%s.png"%(tau,rho,gamma))
    return im
  • Solving $Ax = b$ for $x$ is generally faster than computing the matrix inverse $x = A^{−1}b$ explicitly. In this case Conjugate Gradient iteration is used for solving the linear equation system.
  • Sparse matrices are used to speed up the computation since the Kernel matrix has many zero entries

Task 4

In [7]:
B=plt.imread('charlie-chaplin.jpg')
In [8]:
def COR(tau,rho,gamma,B=B):
    A=kernel_ridge_regression(tau,rho,gamma)
    numerator,denominatorA,denominatorB=0,0,0
    A_bar,B_bar=A.mean(),B.mean()
    for i in range(A.shape[0]):
        for j in range(A.shape[1]):
            numerator+=(A[i,j]-A_bar)*(B[i,j]-B_bar)
            denominatorA+=(A[i,j]-A_bar)**2
            denominatorB+=(B[i,j]-B_bar)**2
    denominator=np.sqrt(denominatorA)*np.sqrt(denominatorB)
    return numerator/denominator
In [9]:
number_of_samples = 30
parameterUpperLimits = np.array([1,7,4])
parameterLowerLimits = np.array([.005,1,1])
thetas=[]
for i in range(number_of_samples):
    params = sobol.i4_sobol(3, i)[0]*(parameterUpperLimits - parameterLowerLimits ) + parameterLowerLimits
    thetas.append(params)
thetas=np.array(thetas)
In [28]:
#create training data
cors=[]
for i in range(number_of_samples):
    print('hyperparameters:',thetas[i])
    cors.append(COR(*thetas[i]))
cors=np.array(cors)
hyperparameters: [0.005 1.    1.   ]
training...
building input kernel matrix
sparsity: 0.24%
solving sparse system
done computing alpha
done training
predicting...
done predicting
Train and predict took 12.26 seconds.
hyperparameters: [0.5025 4.     2.5   ]
training...
building input kernel matrix
sparsity: 0.31%
solving sparse system
done computing alpha
done training
predicting...
done predicting
Train and predict took 15.77 seconds.
hyperparameters: [0.75125 2.5     3.25   ]
training...
building input kernel matrix
sparsity: 0.09%
solving sparse system
done computing alpha
done training
predicting...
done predicting
Train and predict took 15.37 seconds.
hyperparameters: [0.25375 5.5     1.75   ]
training...
building input kernel matrix
sparsity: 1.17%
solving sparse system
done computing alpha
done training
predicting...
done predicting
Train and predict took 15.52 seconds.
hyperparameters: [0.378125 3.25     2.875   ]
training...
building input kernel matrix
sparsity: 0.17%
solving sparse system
done computing alpha
done training
predicting...
done predicting
Train and predict took 15.68 seconds.
hyperparameters: [0.875625 6.25     1.375   ]
training...
building input kernel matrix
sparsity: 2.76%
solving sparse system
done computing alpha
done training
predicting...
done predicting
Train and predict took 12.22 seconds.
hyperparameters: [0.626875 1.75     2.125   ]
training...
building input kernel matrix
sparsity: 0.09%
solving sparse system
done computing alpha
done training
predicting...
done predicting
Train and predict took 15.08 seconds.
hyperparameters: [0.129375 4.75     3.625   ]
training...
building input kernel matrix
sparsity: 0.26%
solving sparse system
done computing alpha
done training
predicting...
done predicting
Train and predict took 37.28 seconds.
hyperparameters: [0.1915625 2.875     1.9375   ]
training...
building input kernel matrix
sparsity: 0.26%
solving sparse system
done computing alpha
done training
predicting...
done predicting
Train and predict took 15.21 seconds.
hyperparameters: [0.6890625 5.875     3.4375   ]
training...
building input kernel matrix
sparsity: 0.42%
solving sparse system
done computing alpha
done training
predicting...
done predicting
Train and predict took 21.13 seconds.
hyperparameters: [0.9378125 1.375     2.6875   ]
training...
building input kernel matrix
sparsity: 0.04%
solving sparse system
done computing alpha
done training
predicting...
done predicting
Train and predict took 15.87 seconds.
hyperparameters: [0.4403125 4.375     1.1875   ]
training...
building input kernel matrix
sparsity: 2.20%
solving sparse system
done computing alpha
done training
predicting...
done predicting
Train and predict took 10.39 seconds.
hyperparameters: [0.3159375 2.125     3.8125   ]
training...
building input kernel matrix
sparsity: 0.06%
solving sparse system
done computing alpha
done training
predicting...
done predicting
Train and predict took 16.11 seconds.
hyperparameters: [0.8134375 5.125     2.3125   ]
training...
building input kernel matrix
sparsity: 0.57%
solving sparse system
done computing alpha
done training
predicting...
done predicting
Train and predict took 16.84 seconds.
hyperparameters: [0.5646875 3.625     1.5625   ]
training...
building input kernel matrix
sparsity: 0.68%
solving sparse system
done computing alpha
done training
predicting...
done predicting
Train and predict took 15.35 seconds.
hyperparameters: [0.0671875 6.625     3.0625   ]
training...
building input kernel matrix
sparsity: 0.63%
solving sparse system
done computing alpha
done training
predicting...
done predicting
Train and predict took 64.92 seconds.
hyperparameters: [0.09828125 3.8125     3.53125   ]
training...
building input kernel matrix
sparsity: 0.18%
solving sparse system
done computing alpha
done training
predicting...
done predicting
Train and predict took 38.33 seconds.
hyperparameters: [0.59578125 6.8125     2.03125   ]
training...
building input kernel matrix
sparsity: 1.26%
solving sparse system
done computing alpha
done training
predicting...
done predicting
Train and predict took 17.34 seconds.
hyperparameters: [0.84453125 2.3125     1.28125   ]
training...
building input kernel matrix
sparsity: 0.51%
solving sparse system
done computing alpha
done training
predicting...
done predicting
Train and predict took 14.50 seconds.
hyperparameters: [0.34703125 5.3125     2.78125   ]
training...
building input kernel matrix
sparsity: 0.47%
solving sparse system
done computing alpha
done training
predicting...
done predicting
Train and predict took 19.37 seconds.
hyperparameters: [0.47140625 1.5625     2.40625   ]
training...
building input kernel matrix
sparsity: 0.06%
solving sparse system
done computing alpha
done training
predicting...
done predicting
Train and predict took 15.63 seconds.
 C:\Users\user\AppData\Local\Continuum\anaconda3\lib\site-packages\matplotlib\pyplot.py:514: RuntimeWarning:More than 20 figures have been opened. Figures created through the pyplot interface (`matplotlib.pyplot.figure`) are retained until explicitly closed and may consume too much memory. (To control this warning, see the rcParam `figure.max_open_warning`).
hyperparameters: [0.96890625 4.5625     3.90625   ]
training...
building input kernel matrix
sparsity: 0.22%
solving sparse system
done computing alpha
done training
predicting...
done predicting
Train and predict took 16.69 seconds.
hyperparameters: [0.72015625 3.0625     3.15625   ]
training...
building input kernel matrix
sparsity: 0.14%
solving sparse system
done computing alpha
done training
predicting...
done predicting
Train and predict took 16.11 seconds.
hyperparameters: [0.22265625 6.0625     1.65625   ]
training...
building input kernel matrix
sparsity: 1.60%
solving sparse system
done computing alpha
done training
predicting...
done predicting
Train and predict took 15.59 seconds.
hyperparameters: [0.16046875 1.9375     2.59375   ]
training...
building input kernel matrix
sparsity: 0.08%
solving sparse system
done computing alpha
done training
predicting...
done predicting
Train and predict took 15.70 seconds.
hyperparameters: [0.65796875 4.9375     1.09375   ]
training...
building input kernel matrix
sparsity: 3.68%
solving sparse system
done computing alpha
done training
predicting...
done predicting
Train and predict took 7.47 seconds.
hyperparameters: [0.90671875 3.4375     1.84375   ]
training...
building input kernel matrix
sparsity: 0.41%
solving sparse system
done computing alpha
done training
predicting...
done predicting
Train and predict took 15.79 seconds.
hyperparameters: [0.40921875 6.4375     3.34375   ]
training...
building input kernel matrix
sparsity: 0.51%
solving sparse system
done computing alpha
done training
predicting...
done predicting
Train and predict took 34.20 seconds.
hyperparameters: [0.28484375 2.6875     1.46875   ]
training...
building input kernel matrix
sparsity: 0.45%
solving sparse system
done computing alpha
done training
predicting...
done predicting
Train and predict took 15.20 seconds.
hyperparameters: [0.78234375 5.6875     2.96875   ]
training...
building input kernel matrix
sparsity: 0.49%
solving sparse system
done computing alpha
done training
predicting...
done predicting
Train and predict took 17.80 seconds.
In [29]:
np.savetxt('correlations.txt',cors)
In [35]:
GP2=GPy.models.GPRegression(thetas,cors.reshape(-1,1))#,kernel=GPy.kern.Matern52)

We made a mistake in forgetting that GPy.kern.Matern52 needed an argument (3) to make it work. That's why we couldn't continue the exercise. We definitly had troubles with the GPy package. The rest seems to be in order.</span >

In [ ]: